import json
from docx import Document
import requests
from app.config.secure import *


def ParseDocx(file):
    content = ''
    document = Document(file)
    all_paragraphs = document.paragraphs
    for paragraph in all_paragraphs:
        if len(paragraph.text) > 0:
            content += paragraph.text + '\n'
    return content


def ParseDoc(filename):
    paper = ''
    char_sum = ''
    try:
        res = requests.get(f'{ParseDocUrl}{filename}')
        if res.status_code == 200:
            content = json.loads(res.content)
            char_sum = content['char_sum']
            origin = content['content'].replace('TOC \o "1-3" \h \\u ', '').replace('\t', '')
            for paragraph in origin.split('\n'):
                if len(paragraph) > 1:
                    paper += paragraph + '\n'
    except:
        pass
    return char_sum, paper


def ParsePDF(filename):
    paper = ''
    char_sum = ''
    try:
        res = requests.get(f'{ParsePdfUrl}{filename}')
        if res.status_code == 200:
            content = json.loads(res.content)
            char_sum = content['char_sum']
            origin = content['content'].replace('Evaluation Warning : The document was created with Spire.PDF for Java.', '')
            for paragraph in origin.split('\n'):
                paragraph = paragraph.strip()
                if paragraph != '':
                    if paragraph[-1] in ['。', '.', ';', '；'] or len(paragraph) < 25:
                        paragraph = paragraph + '\n'
                    if len(paragraph) > 1:
                        paper += paragraph
    except Exception as e:
        print(e)
    return char_sum, paper
